#ifndef __FLOW_DIFFUSION_CUDA_H__
#define __FLOW_DIFFUSION_CUDA_H__

	#define	COMPUTE_ENTROPY_VOLUME_HOST					0x01
	#define COMPUTE_ENTROPY_VOLUME_CUDA					0x02
	#define COMPUTE_ENTROPY_VOLUME_WITH_SORTING_CUDA	0x03
	#define COMPUTE_ENTROPY_VOLUME_PER_SCANLINE_CUDA	0x04
	#define COMPUTE_ENTROPY_VOLUME_SORT_PER_VOXEL_CUDA		0x05	
	#define COMPUTE_ENTROPY_VOLUME_ON_SPARSE_HISTOGRAM_CUDA	0x06	
	#define COMPUTE_ENTROPY_VOLUME_WITH_MARGINAL_HISTOGRAM	0x07	
	#define COMPUTE_ENTROPY_VOLUME_WITH_SORTING_IN_VECTORS_CUDA	0x08		

	#define COMPUTE_ENTROPY_VOLUME					COMPUTE_ENTROPY_VOLUME_WITH_SORTING_IN_VECTORS_CUDA

	#define SCANNING_METHOD_SCAN_WHOLE_HISTOGRAM					0x11
	#define SCANNING_METHOD_HIERARCHICAL_SCAN						0x12
	#define SCANNING_METHOD_SCAN_ROWS_IN_PARALLEL					0x13
	#define SCANNING_METHOD_SKIP_WITH_MARGINAL_HISTOGRAM			0x14

	#if	COMPUTE_ENTROPY_VOLUME	==	COMPUTE_ENTROPY_VOLUME_CUDA	
		#define SCANNING_METHOD		SCANNING_METHOD_SCAN_ROWS_IN_PARALLEL
	#endif

	#if	COMPUTE_ENTROPY_VOLUME	==	COMPUTE_ENTROPY_VOLUME_HOST
		#define SCANNING_METHOD		SCANNING_METHOD_SKIP_WITH_MARGINAL_HISTOGRAM	// SCANNING_METHOD_SCAN_WHOLE_HISTOGRAM
	#endif

	#if	COMPUTE_ENTROPY_VOLUME	==	COMPUTE_ENTROPY_VOLUME_PER_SCANLINE_CUDA	
		#define SCANNING_METHOD		SCANNING_METHOD_SCAN_ROWS_IN_PARALLEL	// SCANNING_METHOD_SCAN_WHOLE_HISTOGRAM	// SCANNING_METHOD_SCAN_ROWS_IN_PARALLEL
	#endif

	#if		COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_HOST
		#define ENTROPY_VOLUME_POSTFIX "host"
	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_CUDA
		#if	HIERARCHICAL_SCANNING	
		#define ENTROPY_VOLUME_POSTFIX "hierarchical_cuda"
		#else	
		#define ENTROPY_VOLUME_POSTFIX "cuda"
		#endif	
	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_WITH_SORTING_CUDA
		#define ENTROPY_VOLUME_POSTFIX "w_sorting_cuda"
	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_PER_SCANLINE_CUDA
		#define ENTROPY_VOLUME_POSTFIX "per_scanline_cuda"
	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_SORT_PER_VOXEL_CUDA	
		#define ENTROPY_VOLUME_POSTFIX "per_voxel_cuda"					
	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_ON_SPARSE_HISTOGRAM_CUDA	
		#define ENTROPY_VOLUME_POSTFIX "sparse_matrix_cuda"					
	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_WITH_MARGINAL_HISTOGRAM	
		#define ENTROPY_VOLUME_POSTFIX "marginal_histogram_cuda"					

	#elif	COMPUTE_ENTROPY_VOLUME == COMPUTE_ENTROPY_VOLUME_WITH_SORTING_IN_VECTORS_CUDA	
			#define ENTROPY_VOLUME_POSTFIX "w_sorting_in_vector_cuda"
	#endif

	#define MAX_THREADS							128	// // 128
	#define NR_OF_THREADS_PER_HALF_WARP			16

	#define	MAX_Z_BLOCKS_IN_MEMORY				2

	#define MAX_MEMORY_SIZE						(1*(1<<27))

	#define RADIX_SORT_BITS						31

	#define	_ComputeEntropyVolume_PRINT_TIMING			0
	#define	_ComputeEntropyVolume_PRINT_LOOP_TIMING		0

	#define CHECK_ERROR_CONVERGENCE_BY_CUDPP	0

	#include "cuda_macro.h"

	#include "liblog.h"
	using namespace std;

	#include "libbuf.h"

////////////////////////////////////////////
	#define PRINT_FLOW_DIFFUSION_TIMING	0
	#define USE_SHARED_MEMORY			0

	#define SHOW_COMPUTE_SRC_BIN_VOLUME_TIMING	1	

						// if this preprocessor is non zero, the volume is scanned via a for loop on the host
	#define DIFFUSION_BY_FOR_LOOP_ON_HOST		0

	#define BLOCK_DIM_X	16
	#define BLOCK_DIM_Y	8	// 8	

#endif	// __FLOW_DIFFUSION_CUDA_H__

